In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

In [2]:
%matplotlib inline
plt.rcParams['figure.figsize'] = 6, 4.5
plt.rcParams['axes.grid'] = True
plt.gray()


<matplotlib.figure.Figure at 0x7fd012b25668>

The aim of this notebook is to make a submission for this competition as quickly as possible. Just want to get something in.

Loading the data

Getting all the filenames for Dog_1 ready, going to do the analysis on this first, then iterate over the rest after that.


In [3]:
cd ~/repositories/neuroglycerin-data/Dog_1/


/home/gavin/repositories/neuroglycerin-data/Dog_1

In [4]:
import glob

In [5]:
# grab all the filenames
fnames = glob.glob("*")

In [6]:
# test file
t = fnames[0]
print(t)


Dog_1_test_segment_0353.mat

In [7]:
import scipy.io

In [8]:
matest = scipy.io.loadmat(t)

In [9]:
matest.keys()


Out[9]:
dict_keys(['__globals__', '__header__', '__version__', 'test_segment_353'])

In [10]:
matest['__version__']


Out[10]:
'1.0'

In [11]:
type(matest['test_segment_353'])


Out[11]:
numpy.ndarray

In [12]:
type(matest['test_segment_353'][0,0])


Out[12]:
numpy.void

In [13]:
# weird, try making it an array
data = np.array(matest['test_segment_353'][0,0])

So, now I'm in the situation this guy was in(looks like a decent blog) and apparently I need to just know the right key. Matlab can fuck off so much right now.


In [14]:
data.dtype


Out[14]:
dtype([('data', 'O'), ('data_length_sec', 'O'), ('sampling_frequency', 'O'), ('channels', 'O')])

In [15]:
data['data']


Out[15]:
array(array([[ 17,  22,  29, ..., -32, -17,   5],
       [ 39,  17,  -2, ...,  19,  13,   0],
       [ -9,  -5,   0, ...,  30,  33,  28],
       ..., 
       [-19, -19, -12, ..., -24,  -2,  -5],
       [-20, -11,   0, ...,   0,  -9, -12],
       [-24,  -5,   3, ...,  -2, -10, -14]], dtype=int16), dtype=object)

In [16]:
data['sampling_frequency']


Out[16]:
array(array([[ 399.6097561]]), dtype=object)

In [17]:
data['channels']


Out[17]:
array(array([[array(['NVC1202_32_002_Ecog_c001'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c002'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c003'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c004'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c005'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c006'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c007'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c008'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c009'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c010'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c011'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c012'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c013'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c014'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c015'], 
      dtype='<U24'),
        array(['NVC1202_32_002_Ecog_c016'], 
      dtype='<U24')]], dtype=object), dtype=object)

Writing a function to do the above for arbitrary files:


In [18]:
for k in matest.keys():
    print(k)


__globals__
__header__
__version__
test_segment_353

In [19]:
[k for k in matest.keys() if "segment" in k][0]


Out[19]:
'test_segment_353'

In [27]:
def fuckyoumatlab(fname):
    mat = scipy.io.loadmat(fname)
    # assuming they'll always have segment in them
    # I apologise for this
    k = [k for k in mat.keys() if "segment" in k][0]
    data = mat[k][0,0]
    # I'm turning this into a dictionary
    # just try and stop me
    datadict = {}
    for i in ['data','sampling_frequency']:
        datadict[i] = data[i]
    return datadict

In [28]:
data = fuckyoumatlab(t)

In [29]:
data.keys()


Out[29]:
dict_keys(['sampling_frequency', 'data'])

In [31]:
data['data'].shape


Out[31]:
(16, 239766)

In [33]:
data['data']


Out[33]:
array([[ 17,  22,  29, ..., -32, -17,   5],
       [ 39,  17,  -2, ...,  19,  13,   0],
       [ -9,  -5,   0, ...,  30,  33,  28],
       ..., 
       [-19, -19, -12, ..., -24,  -2,  -5],
       [-20, -11,   0, ...,   0,  -9, -12],
       [-24,  -5,   3, ...,  -2, -10, -14]], dtype=int16)

In [32]:
plt.plot(data['data'][0,0:100])


Out[32]:
[<matplotlib.lines.Line2D at 0x7fd00f830588>]

Finally, managed to load the data.

Processing

The next thing to do will be to process the data into the simplest possible feature that might be useful. That's probably PSD in each band.


In [36]:
import pylab

In [83]:
x = np.linspace(0,2*np.pi,1000)

In [87]:
s = np.sin(x*100)

In [88]:
w=np.hamming(len(s))

In [89]:
dpsd=pylab.psd(w*s,Fs=1000)



In [71]:
pylab.psd?

In [50]:
data['data'][0,:].astype(np.float)


Out[50]:
array([ 17.,  22.,  29., ..., -32., -17.,   5.])

In [57]:
fs = data['sampling_frequency'].item()

In [58]:
fs


Out[58]:
399.609756097561

In [94]:
w = np.hanning(len(data['data'][0,:].astype(np.float)))

In [97]:
dpsd=pylab.psd(data['data'][0,:].astype(np.float)*w,Fs=fs)


Comparing the PSD of a random preictal with a random interictal.

Interictal


In [99]:
data = fuckyoumatlab(fnames[2])

In [100]:
fs = data['sampling_frequency'].item()
w = np.hanning(len(data['data'][0,:].astype(np.float)))
dpsd=pylab.psd(data['data'][0,:].astype(np.float)*w,Fs=fs)


Preictal


In [102]:
prek = [k for k in fnames if "preictal" in k][0]

In [104]:
data = fuckyoumatlab(prek)

In [105]:
fs = data['sampling_frequency'].item()
w = np.hanning(len(data['data'][0,:].astype(np.float)))
dpsd=pylab.psd(data['data'][0,:].astype(np.float)*w,Fs=fs)


There's a small different at low frequencies? I bet this feature is going to totally suck.

Splitting into bands

I'm just going to go ahead and integrate these PSDs over arbitrary bands up to half the sampling frequency.


In [122]:
bandedges = np.linspace(0,data['sampling_frequency'].item()/2,num=20)

In [123]:
bandedges


Out[123]:
array([   0.        ,   10.51604621,   21.03209243,   31.54813864,
         42.06418485,   52.58023107,   63.09627728,   73.61232349,
         84.1283697 ,   94.64441592,  105.16046213,  115.67650834,
        126.19255456,  136.70860077,  147.22464698,  157.7406932 ,
        168.25673941,  178.77278562,  189.28883184,  199.80487805])

In [155]:
import scipy.integrate

In [131]:
d = data['data'][0,:].astype(np.float)

In [152]:
ps = np.abs(np.fft.fft(d*w))**2

In [153]:
freq = np.fft.fftfreq(len(d),d=1/fs)

In [154]:
plt.plot(freq,ps)


Out[154]:
[<matplotlib.lines.Line2D at 0x7fd00f281630>]

In [166]:
dfreq = {}
for f,p in zip(freq,ps):
    dfreq[f] = p

In [167]:
sfreq = np.sort(freq)

In [175]:
r = [f for f in sfreq if f > 10 and f < 100]
scipy.integrate.trapz([dfreq[f] for f in r],r)


Out[175]:
5017486217.7675982

In [ ]:
for l,h in zip(bandedges):